####preparing, plotting and analyzing ica data

#1# build time windows
#2# extract outliers
#3# plot data
#4# analyse data 
    #overall 
    #adults
    #children

library(lme4)
library(ggplot2)
library(psych)
library(lattice)
library(influence.ME)
library(plyr)
library(dplyr)
library(tidyr)
library(tibble)
library(readr)
library(anytime)
library(languageR)
library(stringr)
library(Rmisc)
library(psych)
library(lmerTest)
library(reshape)
library(ez)
options(scipen =999)




#################################################################################################################################################
### PREPARING ###################################################################################################################################
#################################################################################################################################################

setwd("")
d <- read.csv2("ica_raw data.csv") 




##########################
### BUILD TIME WINDOWS ### (600ms in lenght, non overlapping, starting after first 100ms of word - but directly in postview)
##########################

d <- mutate(d, sw = S2+S3+S4+S5+S6+S7)
d <- mutate(d, vw = V2+V3+V4+V5+V6+V7)
d <- mutate(d, gw = G2+G3+G4+G5+G6+G7)
d <- mutate(d, nw = N2+N3+N4+N5+N6+N7)
d <- mutate(d, pw = Post_Merged1+Post_Merged2+Post_Merged3+Post_Merged4+Post_Merged5+Post_Merged6)

d <- d[,c("subject", "group", "item", "condition", "sw", "vw", "gw", "nw", "pw", "accu", "accu_score", "age", "gender", "flu_names", "flu_cats", "flu_jumps", "csst", "ppvt")]




###########################################
### OUTLIER REMOVAL WITH 1.5*IQR METHOD ### (separately for each group and condition)
###########################################

d <- unite(d, gro_con, group, condition, sep = "_", remove = FALSE)


###SUBJECT WINDOW 
d <- d %>% 
  group_by(gro_con)%>% 
  mutate(swmean = mean(sw, na.rm = TRUE))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(swIQR = IQR(sw, na.rm = TRUE))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(swQ1 = quantile(sw, probs = .25))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(swQ3 = quantile(sw, probs = .75))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(sw_clean = ifelse(sw < swQ1 - 1.5 * swIQR | sw > swQ3 + 1.5 * swIQR, swmean, sw))


### VERB WINDOW 
d <- d %>% 
  group_by(gro_con)%>% 
  mutate(vwmean = mean(vw, na.rm = TRUE))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(vwIQR = IQR(vw, na.rm = TRUE))

d$vwQ1 <- NA
d <- d %>% 
  group_by(gro_con)%>% 
  mutate(vwQ1 = quantile(vw, probs = .25))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(vwQ3 = quantile(vw, probs = .75))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(vw_clean = ifelse(vw < vwQ1 - 1.5 * vwIQR | vw > vwQ3 + 1.5 * vwIQR, vwmean, vw))


### GLEICH WINDOW 
d <- d %>% 
  group_by(gro_con)%>% 
  mutate(gwmean = mean(gw, na.rm = TRUE))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(gwIQR = IQR(gw, na.rm = TRUE))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(gwQ1 = quantile(gw, probs = .25))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(gwQ3 = quantile(gw, probs = .75))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(gw_clean = ifelse(gw < gwQ1 - 1.5 * gwIQR | gw > gwQ3 + 1.5 * gwIQR, gwmean, gw))


### NOUN WINDOW
d <- d %>% 
  group_by(gro_con)%>% 
  mutate(nwmean = mean(nw, na.rm = TRUE))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(nwIQR = IQR(nw, na.rm = TRUE))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(nwQ1 = quantile(nw, probs = .25))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(nwQ3 = quantile(nw, probs = .75))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(nw_clean = ifelse(nw < nwQ1 - 1.5 * nwIQR | nw > nwQ3 + 1.5 * nwIQR, nwmean, nw))


### POST WINDOW
d <- d %>% 
  group_by(gro_con)%>% 
  mutate(pwmean = mean(pw, na.rm = TRUE))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(pwIQR = IQR(pw, na.rm = TRUE))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(pwQ1 = quantile(pw, probs = .25))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(pwQ3 = quantile(pw, probs = .75))

d <- d %>% 
  group_by(gro_con)%>% 
  mutate(pw_clean = ifelse(pw < pwQ1 - 1.5 * pwIQR | pw > pwQ3 + 1.5 * pwIQR, pwmean, pw))


#rounding data, necessary for analyses  
d$sw_clean <- round(d$sw_clean, digits = 0)
d$vw_clean <- round(d$vw_clean, digits = 0)
d$gw_clean <- round(d$gw_clean, digits = 0)
d$nw_clean <- round(d$nw_clean, digits = 0)
d$pw_clean <- round(d$pw_clean, digits = 0)


#saving file which is ready for analysis
d <- d[,c("subject", "group", "item", "condition", "sw_clean", "vw_clean", "gw_clean", "nw_clean", "pw_clean", "accu", "accu_score", "age", "gender", "flu_names", "flu_cats", "flu_jumps", "csst", "ppvt")]
write.csv2(d, "ica_ready2analyze.csv")




##########################################
### BRING DATA IN LONG FORMAT FOR PLOT ###
##########################################

p <- d[,c("subject", "group", "item", "condition", "sw_clean", "vw_clean", "gw_clean", "nw_clean", "pw_clean", "accu", "age", "gender", "flu_names", "flu_cats", "flu_jumps", "csst", "ppvt")]

p$sw_clean <- as.numeric(p$sw_clean)
p$vw_clean <- as.numeric(p$vw_clean)
p$gw_clean <- as.numeric(p$gw_clean)
p$nw_clean <- as.numeric(p$nw_clean)
p$pw_clean <- as.numeric(p$pw_clean)

p$w01 <- p$sw_clean
p$w02 <- p$vw_clean
p$w03 <- p$gw_clean
p$w04 <- p$nw_clean
p$w05 <- p$pw_clean

p <- gather(p, window, ica, w01:w05)
p <- p[,c("subject", "group", "item", "condition", "window", "ica")]




#################
### PLOT DATA ###
#################

p <- rename(p, replace = c("condition" = "Condition"))
p <- mutate(p, window = ifelse(p$window == "w01", "1", 
                        ifelse(p$window == "w02", "2", 
                        ifelse(p$window == "w03", "3", 
                        ifelse(p$window == "w04", "4",
                        ifelse(p$window == "w05", "5", NA))))))

p$window <- as.factor(p$window)
p$Condition <- as.factor(p$Condition)
p$ica <- as.numeric(p$ica)

plot <- summarySE(p, measurevar="ica", groupvars=c("Condition", "window", "group"), na.rm=T)

ggplot(plot, aes(x=window, y=ica, group=Condition, color=Condition, fill = Condition))+
  geom_line(stat = "identity")+
  geom_errorbar(position=position_dodge(.0),width=.2, aes(ymin=ica-se, ymax=ica+se))+
  theme_bw()+ 
  facet_grid(~group) +
  geom_point(size=2) +
  geom_line(size =0.5) +
  theme(strip.background = element_rect(color="black", fill="white", size=0.5, linetype="solid"))+
  theme(strip.text.x = element_text(size = 12, color = "black"))+ 
  ggtitle("Index of Cognitive Activity")+
  theme(plot.title = element_text(face = "bold", size = 14))+
  xlab("Window") +
  ylab("Mean ICA values")+
  theme(axis.title.x = element_text(size = 12))+
  theme(axis.title.y = element_text(size = 12))+
  theme(axis.text.x = element_text(size =10))+
  theme(axis.text.y = element_text(size =10))+
  theme(axis.title.x = element_text(vjust = -2))+ 
  theme(axis.title.y = element_text(vjust = 3))+ 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.3))+
  theme(aspect.ratio = 2/1)+ 
  scale_x_discrete(labels=c("1" = "Subject", "2" = "Verb","3" = "Spill-over", "4" = "Noun", "5" = "Postview"))+ 
  scale_color_manual(values=c("darkred", "darkorange", "blue", "darkolivegreen"), labels=c("0", "1", "3", "4")) +
  scale_y_continuous(breaks = seq(0, 50, by = 5), expand = c(0,0)) 

#ggsave("ica plot_ica.png")#,  height = 7, width = 8)




#################################################################################################################################################
### ANAlYSES ####################################################################################################################################
#################################################################################################################################################

r <- read.csv2("ica_ready2analyze.csv")
r <- rename(r, replace = c("Condition" = "condition"))




##############################
### DESCREPTIVE STATISTICS ### (per group and condition)
##############################

r <- unite(r, gro_con, group, condition, sep = "_", remove = FALSE)

summarize(group_by(r, gro_con), m=mean(sw_clean, na.rm = TRUE ), sd=sd(sw_clean, na.rm = TRUE))
summarize(group_by(r, gro_con), m=mean(vw_clean, na.rm = TRUE ), sd=sd(vw_clean, na.rm = TRUE))
summarize(group_by(r, gro_con), m=mean(gw_clean, na.rm = TRUE ), sd=sd(gw_clean, na.rm = TRUE))
summarize(group_by(r, gro_con), m=mean(nw_clean, na.rm = TRUE ), sd=sd(nw_clean, na.rm = TRUE))
summarize(group_by(r, gro_con), m=mean(pw_clean, na.rm = TRUE ), sd=sd(pw_clean, na.rm = TRUE))

summarize(group_by(r, condition), m=mean(sw_clean, na.rm = TRUE ), sd=sd(sw_clean, na.rm = TRUE))
summarize(group_by(r, condition), m=mean(vw_clean, na.rm = TRUE ), sd=sd(vw_clean, na.rm = TRUE))
summarize(group_by(r, condition), m=mean(gw_clean, na.rm = TRUE ), sd=sd(gw_clean, na.rm = TRUE))
summarize(group_by(r, condition), m=mean(nw_clean, na.rm = TRUE ), sd=sd(nw_clean, na.rm = TRUE))
summarize(group_by(r, condition), m=mean(pw_clean, na.rm = TRUE ), sd=sd(pw_clean, na.rm = TRUE))




#####################
### EFFECT CODING ###
#####################

r <- mutate(r, v1 = ifelse(condition == "0", 3/4, -1/4),                                                            #condition 0 vs. 1, 3 & 4
               v2 = ifelse(condition == "0", 0, ifelse(condition == "1", 2/3, -1/3)),                               #condition 1 vs. 3 & 4
               v3 = ifelse(condition == "0", 0, ifelse(condition == "1", 0, ifelse(condition == "3", 1/2, -1/2))))  #condition 3 vs. 4

r <- mutate(r, a = ifelse(group == "Adults", 1/2, -1/2))   #adults vs. children




#####################
### OVERALL GLMER ### (those that converged; separately for each time window with the effect coded factors condition and age group)
#####################

### SUBJECT WINDOW 
subject <- glmer(sw_clean ~ (v1+v2+v3) * a +
                   (1 + (v1+v2+v3) || subject)+
                   (1 + (v1+v2+v3) || item),
                 family = poisson(link = "log"),
                 data = r) 
summary(subject)
confint(subject)


#### VERB WINDOW 
verb <- glmer(vw_clean ~ (v1+v2+v3) * a +
                (1 + (v1+v2+v3) || subject)+
                (1 + (v1+v2+v3) || item),
              family = poisson(link = "log"),
              data = r) 
summary(verb)
confint(verb)


### GLEICH WINDOW 
gleich <- glmer(gw_clean ~ (v1+v2+v3) * a  +
                  (1 + (v1+v2+v3) || subject)+
                  (1 + (v1+v2+v3) || item),
                family = poisson(link = "log"),
                data = r) 
summary(gleich)
confint(gleich)


### NOUN WINDOW 
noun <- glmer(nw_clean ~ (v1+v2+v3) * a+
                (1 + (v1+v2+v3) || subject)+
                (1 + (v1+v2+v3) || item),
              family = poisson(link = "log"),
              data = r) 
summary(noun)
confint(noun)


### POST WINDOW 
post <- glmer(pw_clean ~ (v1+v2+v3) * a+
                (1 + (v1+v2+v3) || subject)+
                (1 + (v1+v2+v3) || item),
              family = poisson(link = "log"),
              data = r) 
summary(post)
confint(post)




###########################
### GLMER - ADULTS ONLY ### (those that converged; separately for each time window with the effect coded factor condition)
###########################

a <- subset(r, group == "Adults")
a$sw_clean <- as.integer(a$sw_clean)
a$vw_clean <- as.integer(a$vw_clean)
a$gw_clean <- as.integer(a$gw_clean)
a$nw_clean <- as.integer(a$nw_clean)
a$pw_clean <- as.integer(a$pw_clean)


### SUBJECT WINDOW
sub_ad <- glmer(sw_clean ~ (v1+v2+v3) +
                  (1 + (v1+v2+v3) || subject)+
                  (1 + (v1+v2+v3) || item),
                family = poisson(link = "log"),
                data = a) 
summary(sub_ad)
confint(sub_ad)


### VERB WINDOW
verb_ad <- glmer(vw_clean ~ (v1+v2+v3) +
                   (1 + (v1+v2+v3) || subject)+
                   (1 + (v1+v2+v3) || item),
                 family = poisson(link = "log"),
                 data = a) 
summary(verb_ad)
confint(verb_ad)


### GLEICH WINDOW
gleich_ad <- glmer(gw_clean ~ (v1+v2+v3) +
                     (1 + (v1+v2+v3) || subject)+
                     (1 + (v1+v2+v3) || item),
                   family = poisson(link = "log"),
                   data = a) 
summary(gleich_ad)
confint(gleich_ad)


### NOUN WINDOW
noun_ad <- glmer(nw_clean ~ (v1+v2+v3) +
                   (1 + (v1+v2+v3) || subject)+
                   (1 + (v1+v2+v3) || item),
                 family = poisson(link = "log"),
                 data = a) 
summary(noun_ad)
confint(noun_ad)


### POST WINDOW
post_ad <- glmer(pw_clean ~ (v1+v2+v3) +
                   (1 + (v1+v2+v3) || subject)+
                   (1 + (v1+v2+v3) || item),
                 family = poisson(link = "log"),
                 data = a) 
summary(post_ad)
confint(post_ad)




#############################
### GLMER - CHILDREN ONLY ### (those that converged; separately for each time window with the effect coded factor condition)
#############################

k <- subset(r, group == "Children")
k$sw_clean <- as.integer(k$sw_clean)
k$vw_clean <- as.integer(k$vw_clean)
k$gw_clean <- as.integer(k$gw_clean)
k$nw_clean <- as.integer(k$nw_clean)
k$pw_clean <- as.integer(k$pw_clean)


### SUBJECT WINDOW
sub_ki <- glmer(sw_clean ~ (v1+v2+v3) +
                  (1 + (v1+v2+v3) || subject)+
                  (1 + (v1+v2+v3) || item),
                family = poisson(link = "log"),
                data = k) 
summary(sub_ki)
confint(sub_ki)


### VERB WINDOW
verb_ki <- glmer(vw_clean ~ (v1+v2+v3) +
                   (1 + (v1+v2+v3) || subject)+
                   (1 + (v1+v2+v3) || item),
                 family = poisson(link = "log"),
                 data = k) 
summary(verb_ki)
confint(verb_ki)


### GLEICH WINDOW
gleich_ki <- glmer(gw_clean ~ (v1+v2+v3) +
                     (1 + (v1+v2+v3) || subject)+
                     (1 + (v1+v2+v3) || item),
                   family = poisson(link = "log"),
                   data = k) 
summary(gleich_ki)
confint(gleich_ki)


### NOUN WINDOW
noun_ki <- glmer(nw_clean ~ (v1+v2+v3) +
                   (1 + (v1+v2+v3) || subject)+
                   (1 + (v1+v2+v3) || item),
                 family = poisson(link = "log"),
                 data = k) 
summary(noun_ki)
confint(noun_ki)


### POST WINDOW
post_ki <- glmer(pw_clean ~ (v1+v2+v3) +
                   (1 + (v1+v2+v3) || subject)+
                   (1 + (v1+v2+v3) || item),
                 family = poisson(link = "log"),
                 data = k) 
summary(post_ki)
confint(post_ki)